; -*- mode: scheme; coding: utf-8 -*-
; Replacement for Guile C-based array system - Benchmarks

; (c) Daniel Llorens - 2016-2021
; This library is free software; you can redistribute it and/or modify it under
; the terms of the GNU General Public License as published by the Free
; Software Foundation; either version 3 of the License, or (at your option) any
; later version.

; Run with $GUILE -L mod bench/bench.scm

(import (newra) (newra tools) (newra read)
        (only (newra print) ra-print) (test misc)
        (ice-9 popen) (ice-9 rdelim) (srfi srfi-26) (srfi srfi-8) (srfi srfi-19)
        (only (srfi srfi-1) fold iota) (rnrs bytevectors) (ice-9 match) (ice-9 format)
        (only (srfi srfi-43) vector-copy!) (only (rnrs base) vector-map))

(define (command-output cmd . args)
  (let* ((p (apply open-pipe* OPEN_READ cmd args))
         (s (read-delimited "" p))
         (ec (status:exit-val (close-pipe p))))
    (values s ec)))

;; (format #t "Guile ~a\n~!" (version))
;; (format #t "newra ~a ~a\n~!"
;;         (string-trim-both (command-output "git" "describe" "--always" "--dirty"))
;;         (date->string (current-date) "~4"))

(define (format-header . x)
  (apply format #t
    (string-append (apply string-append "~30t" (map (const "~15@a") x))
                   "\n")
    x))

(define (format-line . x)
  (apply format #t
    (string-append (apply string-append "~30t" (map (const "~15,3f") x))
                   "\n")
    x))


; -----------------------
; benchmarks
; -----------------------

(let ((m #e5e5))
  (format #t "\nlookup, rank 1\n==================\n")
  (format-header "ra-ref" "ra-cell" "ra-appl" "array-ref" "native-ref")
  (for-each
      (lambda (type native-ref)
        (let* ((rank 1)
               (n (inexact->exact (ceiling (expt m (/ rank)))))
               (nn (make-list rank n))
               (len (fold * 1 nn))
               (scale (* 1e8 (/ len)))
               (ra (as-ra (make-ra-root (make-aseq) (apply c-dims nn)) #:type type))
               (ro (ra-root ra))
               (a (ra->array ra))
               (ras 0)
               (as 0)
               (nas 0)
; FIXME test ra-ref / ra-cell / (ra ...)
               (raf-ref (lambda (i) (set! ras (+ ras (ra-ref ra i)))))
               (raf-cell (lambda (i) (set! ras (+ ras (ra-cell ra i)))))
               (raf-appl (lambda (i) (set! ras (+ ras (ra i)))))
               (af (lambda (i) (set! as (+ as (array-ref a i)))))
               (naf (lambda (i) (set! nas (+ nas (native-ref ro i))))))
          (unless (= nas ras as) (throw 'error-in-ra-cell-array-ref-check))
          (format #t "type ~a ~a:" type nn)
          (format-line (* scale (time (ra-loop ra raf-ref)))
                       (* scale (time (ra-loop ra raf-cell)))
                       (* scale (time (ra-loop ra raf-appl)))
                       (* scale (time (array-loop a af)))
                       (* scale (time (ra-loop ra naf))))))
    '(#t f64 u32)
    (list vector-ref f64vector-ref u32vector-ref)))

(let ((m #e1e5))
  (format #t "\nlookup, any rank\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\n~a\n---------" type)
        (format-header "ra-ref" "ra-cell" "ra-appl" "array-ref")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra (as-ra (make-ra-root (make-aseq) (apply c-dims nn)) #:type type))
                     (a (ra->array ra))
                     (ras 0)
                     (as 0)
; FIXME test ra-ref / ra-cell / (ra ...)
                     (raf-ref (case-lambda ((i) (set! ras (+ ras (ra-ref ra i))))
                                           ((i j) (set! ras (+ ras (ra-ref ra i j))))
                                           ((i j k) (set! ras (+ ras (ra-ref ra i j k))))
                                           ((i j k l) (set! ras (+ ras (ra-ref ra i j k l))))
                                           ((i j k l m) (set! ras (+ ras (ra-ref ra i j k l m))))))
                     (raf-cell (case-lambda ((i) (set! ras (+ ras (ra-cell ra i))))
                                            ((i j) (set! ras (+ ras (ra-cell ra i j))))
                                            ((i j k) (set! ras (+ ras (ra-cell ra i j k))))
                                            ((i j k l) (set! ras (+ ras (ra-cell ra i j k l))))
                                            ((i j k l m) (set! ras (+ ras (ra-cell ra i j k l m))))))
                     (raf-appl (case-lambda ((i) (set! ras (+ ras (ra i))))
                                            ((i j) (set! ras (+ ras (ra i j))))
                                            ((i j k) (set! ras (+ ras (ra i j k))))
                                            ((i j k l) (set! ras (+ ras (ra i j k l))))
                                            ((i j k l m) (set! ras (+ ras (ra i j k l m))))))
                     (af (case-lambda ((i) (set! as (+ as (array-ref a i))))
                                      ((i j) (set! as (+ as (array-ref a i j))))
                                      ((i j k) (set! as (+ ras (array-ref a i j k))))
                                      ((i j k l) (set! as (+ ras (array-ref a i j k l))))
                                      ((i j k l m) (set! as (+ ras (array-ref a i j k l m)))))))
                (unless (= ras as) (throw 'error-in-ra-cell-array-ref-check))
                (format #t "rank ~a ~a:" rank nn)
                (format-line (* scale (time (ra-loop ra raf-ref)))
                             (* scale (time (ra-loop ra raf-cell)))
                             (* scale (time (ra-loop ra raf-appl)))
                             (* scale (time (array-loop a af))))))
          (iota 5 1)))
    '(#t f64)))

(let ((m #e1e5))
  (format #t "\niteration\n==================\n")
  (for-each
      (lambda (type)
        (for-each
            (lambda (nargs)
              (format #t "\n~a ~a args\n---------" type nargs)
              (format-header "ra-sfe" "array-sfe" "ra-map" "array-map" "ra-fe" "array-fe")
              (for-each
                  (lambda (rank)
                    (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                           (nn (make-list rank n))
                           (len (fold * 1 nn))
                           (scale (* 1e8 (/ len)))
                           (ra20 (make-ra-new type *unspecified* (apply c-dims nn)))
                           (ra21 (ra-map*! ra-slice-for-each (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                           (ra22 (ra-map*! ra-slice-for-each (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                           (ra23 (ra-map*! ra-slice-for-each (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                           (a20 (ra->array ra20))
                           (a21 (ra->array ra21))
                           (a22 (ra->array ra22))
                           (a23 (ra->array ra23)))
                      (let-syntax ((feop
                                    (syntax-rules ()
                                      ((_ fe a ...)
                                       (let ((k 0)) (fe (lambda (a ...) (set! k (+ k a ...))) a ...))))))
                        (format #t "rank ~a ~a:" rank nn)
                        (case nargs
                          ((4)
                           (format-line (* scale (time (ra-map*! ra-slice-for-each ra20 - ra21 ra22 ra23)))
                                        (* scale (time (array-map*! a20 - a21 a22 a23)))
                                        (* scale (time (ra-map! ra20 - ra21 ra22 ra23)))
                                        (* scale (time (array-map! a20 - a21 a22 a23)))
                                        (* scale (time (feop ra-for-each ra20 ra21 ra22 ra23)))
                                        (* scale (time (feop array-for-each a20 a21 a22 a23)))))
                          ((3)
                           (format-line (* scale (time (ra-map*! ra-slice-for-each ra20 - ra21 ra22)))
                                        (* scale (time (array-map*! a20 - a21 a22)))
                                        (* scale (time (ra-map! ra20 - ra21 ra22)))
                                        (* scale (time (array-map! a20 - a21 a22)))
                                        (* scale (time (feop ra-for-each ra20 ra21 ra22)))
                                        (* scale (time (feop array-for-each a20 a21 a22)))))
                          ((2)
                           (format-line (* scale (time (ra-map*! ra-slice-for-each ra20 - ra21)))
                                        (* scale (time (array-map*! a20 - a21)))
                                        (* scale (time (ra-map! ra20 - ra21)))
                                        (* scale (time (array-map! a20 - a21)))
                                        (* scale (time (feop ra-for-each ra20 ra21)))
                                        (* scale (time (feop array-for-each a20 a21)))))
                          ((1)
                           (format-line (* scale (time (ra-map*! ra-slice-for-each ra20 (cut random n))))
                                        (* scale (time (array-map*! a20 (cut random n))))
                                        (* scale (time (ra-map! ra20 (cut random n))))
                                        (* scale (time (array-map! a20 (cut random n))))
                                        (* scale (time (feop ra-for-each ra20)))
                                        (* scale (time (feop array-for-each a20)))))))))
                (iota 6 1)))
          (iota 4 1)))
    '(#t f64)))

(let ((m #e5e5))
  (format #t "\ncopy\n==================\n")
  (for-each
   (match-lambda
     ((typesrc typedst transposed?)
      (format #t "\nsrc ~a -> dst ~a transposed: ~a\n---------" typesrc typedst transposed?)
      (format-header "ra" "array")
      (for-each
       (lambda (rank)
         (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                (nn (make-list rank n))
                (len (fold * 1 nn))
                (scale (* 1e8 (/ len)))
                (ra20 (make-ra-new typesrc *unspecified* (apply c-dims nn)))
                (ra21 (ra-map! (make-ra-new typedst 0 (apply c-dims nn)) (cut random n)))
                (ra21 (if transposed?
                        (apply ra-transpose ra21 (reverse (iota rank)))
                        ra21))
                (a20 (ra->array ra20))
                (a21 (ra->array ra21)))
           (format #t "rank ~a ~a:" rank nn)
           (format-line (* scale (time (ra-copy! ra20 ra21)))
                        (* scale (time (array-copy! a21 a20))))))
       (iota 6 1))))
   '((#t #t #f)
     (f64 f64 #f)
     (#t f64 #f)
     (#t #t #t)
     (f64 f64 #t)
     (#t f64 #t))))

(for-each
 (lambda (type native-copy! native-length)
   (let ((m #e1e7)
         (rank 1)
         (typesrc type)
         (typedst type))
     (format #t "\n~a ra-copy! array-copy! native-copy!\n==================\n" typesrc)
     (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
            (nn (make-list rank n))
            (len (fold * 1 nn))
            (scale (* 1e9 (/ len)))
            (ra20 (make-ra-new typesrc *unspecified* (apply c-dims nn)))
            (ra21 (make-ra-new typedst 0 (apply c-dims nn)))
            (ra21 (ra-map! ra21 (cut random 256)))
            (a20 (ra->array ra20))
            (a21 (ra->array ra21)))
       (format #t "rank ~a ~a:" rank nn)
       (format-line (* scale (time (ra-copy! ra20 ra21)))
                    (* scale (time (array-copy! a21 a20)))
                    (* scale (time (native-copy! a21 0 a20 0 (native-length a21))))))))
 (list #t 'f64)
 (list vector-copy! bytevector-copy!)
 (list vector-length bytevector-length))

(let ((m #e5e5))
  (format #t "\nra-fill! array-fill!\n==================\n")
  (for-each
      (lambda (type transposed?)
        (format #t "\ndst ~a transposed: ~a\n----------" type transposed?)
        (format-header "ra" "array")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra20 (make-ra-new type *unspecified* (apply c-dims nn)))
                     (ra20 (if transposed?
                             (apply ra-transpose ra20 (reverse (iota rank)))
                             ra20))
                     (a20 (ra->array ra20)))
                (format #t "rank ~a ~a:" rank nn)
                (format-line (* scale (time (ra-fill! ra20 77)))
                             (* scale (time (array-fill! a20 77))))))
          (iota 6 1)))
    (list #t 'f64 'u8 #t 'f64 'u8)
    (list #f #f #f #t #t #t)))

(for-each
 (lambda (type native-fill!)
   (let ((m #e1e7)
         (rank 1))
     (format #t "\ndst ~a ra-fill! array-fill! native-fill!\n==================\n" type)
     (format-header "ra" "array" "native")
     (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
            (nn (make-list rank n))
            (len (fold * 1 nn))
            (scale (* 1e9 (/ len)))
            (ra20 (make-ra-new type *unspecified* (apply c-dims nn)))
            (a20 (ra->array ra20)))
       (format #t "rank ~a ~a:" rank nn)
       (format-line (* scale (time (ra-fill! ra20 77)))
                    (* scale (time (array-fill! a20 77)))
                    (* scale (time (native-fill! a20 77)))))))
; would bench 'f64 but there's no f64vector-fill!
 (list #t 'u8)
 (list vector-fill! bytevector-fill!))

(let ((m #e5e5))
  (format #t "\nra-equal? array-equal?\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra" "array")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra20 (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (ra21 (ra-copy ra20))
                     (ra22 (apply ra-amend! (ra-copy ra21) (+ n 1) (map car (ra-shape ra21))))
                     (ra23 (apply ra-amend! (ra-copy ra21) (+ n 1) (map cadr (ra-shape ra21))))
                     (a20 (ra->array ra20))
                     (a21 (ra->array ra21))
                     (a22 (ra->array ra22))
                     (a23 (ra->array ra23)))
                (unless (ra-equal? ra20 ra21) (throw 'bad-ra-equal?-1))
                (unless (array-equal? a20 a21) (throw 'bad-array-equal?-1))
                (format #t "rank ~a ~a (#t):" rank nn)
                (format-line (* scale (time (ra-equal? ra20 ra21)))
                             (* scale (time (array-equal? a20 a21))))
                (when (ra-equal? ra20 ra22) (throw 'bad-ra-equal?-2))
                (when (array-equal? a20 a22) (throw 'bad-array-equal?-2))
                (format #t "rank ~a ~a (#f1):" rank nn)
                (format-line (* scale (time (ra-equal? ra20 ra22)))
                             (* scale (time (array-equal? a20 a22))))
                (when (ra-equal? ra20 ra23) (throw 'bad-ra-equal?-3))
                (when (array-equal? a20 a23) (throw 'bad-array-equal?-3))
                (format #t "rank ~a ~a (#f2):" rank nn)
                (format-line (* scale (time (ra-equal? ra20 ra23)))
                             (* scale (time (array-equal? a20 a23))))))
          (iota 6 1)))
    (list #t 'f64)))

(let ((m #e1e4))
  (format #t "\nprinting\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra" "array1" "array2")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e7 (/ len)))
                     (ra (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (a (ra->array ra)))
                (format #t "rank ~a ~a:" rank nn)
                (format-line (* scale (time (call-with-output-file "/dev/null" (cut display ra <>))))
                             (* scale (time (call-with-output-file "/dev/null" (cut array-print* a <>))))
                             (* scale (time (call-with-output-file "/dev/null" (cut display a <>)))))))
          (iota 6 1)))
    (list #t 'f64)))

(let ((m #e1e4))
  (format #t "\nreading\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra1" "ra2" "array")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e7 (/ len)))
                     (ra (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (sra1 (call-with-output-string (cut ra-print ra <> #:dims? #t)))
                     (sra2 (call-with-output-string (cut ra-print ra <> #:dims? #f)))
                     (a (ra->array ra))
                     (sa (call-with-output-string (cut display a <>))))
                (format #t "rank ~a ~a:" rank nn)
                (let ((rb #f) (b #f))
                  (format-line (* scale (time (set! rb (call-with-input-string sra1 read))))
                               (* scale (time (set! rb (call-with-input-string sra2 read))))
                               (* scale (time (set! b (call-with-input-string sa read)))))
                  (unless (array-equal? (ra->array rb) b) (throw 'bad-reading-benchmark)))))
          (iota 6 1)))
    (list #t 'f64)))

(let ((m #e1e5))
  (format #t "\nlist->ra\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra" "array" "ra/shape" "array/shape")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (la (ra->list ra))
                     (shape (map (lambda (len) (list 0 (- len 1))) nn)))
                (format #t "rank ~a ~a:" rank nn)
                (let ((rb #f) (b #f))
                  (format-line (* scale (time (set! rb (list->ra rank la))))
                               (* scale (time (set! b (list->array rank la))))
                               (* scale (time (set! rb (list->typed-ra type shape la))))
                               (* scale (time (set! b (list->typed-array type shape la)))))
                  (unless (array-equal? (ra->array ra) b) (throw 'bad-ra->list-benchmark))
                  (unless (array-equal? (ra->array rb) b) (throw 'bad-ra->list-benchmark)))))
          (iota 6 1)))
    (list #t 'f64)))

(let ((m #e1e5))
  (format #t "\nra->list\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra" "array")
        (for-each
            (lambda (rank)
              (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (array (ra->array ra))
                     (la (ra->list ra)))
                (format #t "rank ~a ~a:" rank nn)
                (let ((lb #f) (b #f))
                  (format-line (* scale (time (set! lb (ra->list ra))))
                               (* scale (time (set! b (array->list array)))))
                  (unless (equal? la b) (throw 'bad-ra->list-benchmark))
                  (unless (equal? lb b) (throw 'bad-ra->list-benchmark)))))
          (iota 6 1)))
    (list #t 'f64)))

(let ((m #e1e6))
  (format #t "\nra-fold\n==================\n")
  (format #t "handloop is flat let loop with inlined type-ref\n")
  (let-syntax
      ((%inline-type
        (syntax-rules ()
          ((_ type ref)
           (begin
             (format #t "\ndst ~a\n----------" type)
             (format-header "ra" "handloop")
             (for-each
                 (lambda (rank)
                   (let* ((n (inexact->exact (ceiling (expt m (/ rank)))))
                          (nn (make-list rank n))
                          (len (fold * 1 nn))
                          (scale (* 1e9 (/ len)))
                          (ra (ra-copy type (apply ra-i nn)))
                          (array (ra->array ra))
                          (root (array-contents array))
                          (expected (* m (- m 1) 1/2)))
                     (format #t "rank ~a ~a:" rank nn)
                     (let ((lb #f) (b #f))
                       (format-line
                        (* scale (time (set! lb (ra-fold + 0 ra))))
                        (* scale (time (set! b (let loop ((a 0) (i 0)) (if (= i len) a (loop (+ a (ref root i)) (+ 1 i))))))))
                       (unless (= lb expected) (throw 'bad-ra->list-benchmark))
                       (unless (= b expected) (throw 'bad-ra->list-benchmark)))))
               '(1 2 3 6)))))))
    (%inline-type #t vector-ref)
    (%inline-type 'f64 f64vector-ref)))


; -----------------------
; ra-index-map!
; -----------------------

(let ((m #e1e5))
  (format #t "\nra-index-map!\n==================\n")
  (for-each
      (lambda (type)
        (format #t "\ndst ~a\n----------" type)
        (format-header "ra" "array")
        (for-each
            (lambda (rank)
              (let* ((op (lambda x (apply + x)))
                     (n (inexact->exact (ceiling (expt m (/ rank)))))
                     (nn (make-list rank n))
                     (len (fold * 1 nn))
                     (scale (* 1e8 (/ len)))
                     (ra (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
                     (array (ra->array ra)))
                (format #t "rank ~a ~a:" rank nn)
                (format-line (* scale (time (ra-index-map! ra op)))
                             (* scale (time (array-index-map! array op))))
                (unless (array-equal? (ra->array ra) array) (throw 'bad-ra-index-map!-benchmark))))
          (iota 3 1)))
    (list #t 'f64)))


; -----------------------
; some profiling...
; -----------------------

(import (statprof))

(format #t "\nstatprof...\n==================\n")
(let* ((m #e5e4)
       (type #t)
       (rank 3)
       (n (inexact->exact (ceiling (expt (* 10 m) (/ rank)))))
       (nn (make-list rank n))
       (ra0 (make-ra-new type *unspecified* (apply c-dims nn)))
       (ra1 (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
       (ra2 (ra-map! (make-ra-new type 0 (apply c-dims nn)) (cut random n)))
       (s (call-with-output-string (cut display ra1 <>)))
       (prof (lambda () (call-with-input-string s read)))
       (prof (lambda () (ra-fill! ra0 99)))
       (prof (lambda () (ra-copy! ra2 ra1)))
       (prof (lambda () (ra-map! ra0 * ra1 ra2)))
       )
  (statprof prof #:count-calls? #t)
  prof)
